#!/usr/bin/env python3
#
# This exports the emoji_names.py data set to a CSV file in the same
# format used as input for import_emoji_names_from_csv.  We use this
# as part of a test for the correctness of the import process (one can
# compare the exported CSV file to the original CSV file, and if the
# data round-tripped with no changes, we know everything is working
# correctly).
import argparse
import csv
import os
import re
import ujson

from emoji_setup_utils import get_emoji_code

from typing import Any, Dict, List

TOOLS_DIR_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
ZULIP_PATH = os.path.dirname(TOOLS_DIR_PATH)
# `emoji.json` file is same in all four emoji-datasource packages.
EMOJI_DATA_PATH = os.path.join(ZULIP_PATH, 'node_modules', 'emoji-datasource-google', 'emoji.json')

sorting_info = {}   # type: Dict[str, Any]
column_names = [
    'Codepoint',
    'New sorting info',
    'zulip (main)',
    'zulip (alternates)',
    'explanation',
]
category_index = {
    'Smileys & People': '1',
    'Animals & Nature': '2',
    'Food & Drink': '3',
    'Activities': '4',
    'Travel & Places': '5',
    'Objects': '6',
    'Symbols': '7',
    'Flags': '8',
    'Skin Tones': '9',
}

name_entry_regex = re.compile(r"'(?P<emoji_code>[a-z0-9-]+)': "
                              r"{'canonical_name': '(?P<canonical_name>[+-]?[a-z0-9_X-]+)',[\n ]+"
                              r"'aliases': \[(?P<aliases>('([+-]?[a-z0-9_, X-]+)'[, ]{0,2})*)\]},")
explanation_regex = re.compile(r"    # (?P<explanation_line>[^\r\n\t]+)")

def prepare_sorting_info() -> None:
    emoji_data = []     # type: List[Dict[str, Any]]
    with open(EMOJI_DATA_PATH) as fp:
        emoji_data = ujson.load(fp)

    for emoji_dict in emoji_data:
        emoji_code = get_emoji_code(emoji_dict)
        sort_order = str(emoji_dict['sort_order']).strip()
        sorting_info[emoji_code] = {
            'category': emoji_dict['category'],
            'sort_order': sort_order.rjust(3, '0'),
        }

def get_sorting_info(emoji_code: str) -> str:
    category = sorting_info[emoji_code]['category']
    category = category_index[category] + '-' + category
    sort_order = sorting_info[emoji_code]['sort_order']
    return ' '.join([category, sort_order])

def prepare_explanation(explanation_lines: List[str]) -> str:
    return ' '.join(explanation_lines)

def prepare_aliases(captured_aliases: str) -> str:
    aliases = []
    for alias in captured_aliases.split(', '):
        aliases.append(alias.strip("'"))
    return ', '.join(aliases)

def main() -> None:
    description = ("This script is used for exporting `emoji_names.py` to comma separated file. It "
                   "takes the path of output csv file and path to `emoji_names.py` as arguments.")
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument(
        "--input-file", dest="input_file_path", type=str, metavar="<path>",
        default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.py"),
        help="Path to the file from which data is to be read.")
    parser.add_argument(
        "--output-file", dest="output_file_path", type=str, metavar="<path>",
        default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.csv"),
        help="Path to the output csv file.")

    args = parser.parse_args()
    prepare_sorting_info()
    output_data = [column_names, ]
    explanation_lines = []  # type: List[str]
    with open(args.input_file_path) as fp:
        for line in fp.readlines():
            match = name_entry_regex.search(line)
            if match is not None:
                emoji_code = match.group('emoji_code')
                sort_info = get_sorting_info(emoji_code)
                canonical_name = match.group('canonical_name')
                aliases = prepare_aliases(match.group('aliases'))
                explanation = prepare_explanation(explanation_lines)
                output_data.append([
                    emoji_code,
                    sort_info,
                    canonical_name,
                    aliases,
                    explanation,
                ])
                explanation_lines = []
                continue

            match = explanation_regex.search(line)
            if match is not None:
                explanation_line = match.group('explanation_line').strip()
                explanation_lines.append(explanation_line)

    fp = open(args.output_file_path, 'w')
    writer = csv.writer(fp, dialect='excel')
    writer.writerows(output_data)
    # The CSV file exported by google sheets doesn't have a newline
    # character in the end. So we also strip the last newline character
    # so that round-trip conversion test passes.
    line_sep_len = len(os.linesep)
    fp.truncate(fp.tell() - line_sep_len)
    fp.close()

if __name__ == "__main__":
    main()
